PRACTICA 2 VISUALIZACION DE DATOS¶

Perdida de clientes bancarios¶

In [1]:
pip install dash plotly
Requirement already satisfied: dash in c:\users\jlara\anaconda3\lib\site-packages (2.14.2)
Requirement already satisfied: plotly in c:\users\jlara\anaconda3\lib\site-packages (5.9.0)
Requirement already satisfied: Flask<3.1,>=1.0.4 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (2.2.2)
Requirement already satisfied: Werkzeug<3.1 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (2.2.3)
Requirement already satisfied: dash-html-components==2.0.0 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (2.0.0)
Requirement already satisfied: dash-core-components==2.0.0 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (2.0.0)
Requirement already satisfied: dash-table==5.0.0 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (5.0.0)
Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\jlara\anaconda3\lib\site-packages (from dash) (4.7.1)
Requirement already satisfied: requests in c:\users\jlara\anaconda3\lib\site-packages (from dash) (2.31.0)
Requirement already satisfied: retrying in c:\users\jlara\anaconda3\lib\site-packages (from dash) (1.3.4)
Requirement already satisfied: ansi2html in c:\users\jlara\anaconda3\lib\site-packages (from dash) (1.9.1)
Requirement already satisfied: nest-asyncio in c:\users\jlara\anaconda3\lib\site-packages (from dash) (1.5.6)
Requirement already satisfied: setuptools in c:\users\jlara\anaconda3\lib\site-packages (from dash) (68.0.0)
Requirement already satisfied: importlib-metadata in c:\users\jlara\anaconda3\lib\site-packages (from dash) (6.0.0)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\jlara\anaconda3\lib\site-packages (from plotly) (8.2.2)
Requirement already satisfied: Jinja2>=3.0 in c:\users\jlara\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash) (3.1.2)
Requirement already satisfied: itsdangerous>=2.0 in c:\users\jlara\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash) (2.0.1)
Requirement already satisfied: click>=8.0 in c:\users\jlara\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash) (8.0.4)
Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\jlara\anaconda3\lib\site-packages (from Werkzeug<3.1->dash) (2.1.1)
Requirement already satisfied: zipp>=0.5 in c:\users\jlara\anaconda3\lib\site-packages (from importlib-metadata->dash) (3.11.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\jlara\anaconda3\lib\site-packages (from requests->dash) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\jlara\anaconda3\lib\site-packages (from requests->dash) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\jlara\anaconda3\lib\site-packages (from requests->dash) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\jlara\anaconda3\lib\site-packages (from requests->dash) (2023.11.17)
Requirement already satisfied: six>=1.7.0 in c:\users\jlara\anaconda3\lib\site-packages (from retrying->dash) (1.16.0)
Requirement already satisfied: colorama in c:\users\jlara\anaconda3\lib\site-packages (from click>=8.0->Flask<3.1,>=1.0.4->dash) (0.4.6)
Note: you may need to restart the kernel to use updated packages.
In [2]:
import pandas as pd
df = pd.read_csv("clientes.csv")
df = df.dropna()
print (df)
       RowNumber  CustomerId    Surname  CreditScore Geography  Gender   Age  \
0              1    15634602   Hargrave          619    France  Female  42.0   
1              2    15647311       Hill          608     Spain  Female  41.0   
2              3    15619304       Onio          502    France  Female  42.0   
3              4    15701354       Boni          699    France  Female  39.0   
5              6    15574012        Chu          645     Spain    Male  44.0   
...          ...         ...        ...          ...       ...     ...   ...   
9997        9998    15584532        Liu          709    France  Female  36.0   
9998        9999    15682355  Sabbatini          772   Germany    Male  42.0   
9999        9999    15682355  Sabbatini          772   Germany    Male  42.0   
10000      10000    15628319     Walker          792    France  Female  28.0   
10001      10000    15628319     Walker          792    France  Female  28.0   

       Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0           2       0.00              1        1.0             1.0   
1           1   83807.86              1        0.0             1.0   
2           8  159660.80              3        1.0             0.0   
3           1       0.00              2        0.0             0.0   
5           8  113755.78              2        1.0             0.0   
...       ...        ...            ...        ...             ...   
9997        7       0.00              1        0.0             1.0   
9998        3   75075.31              2        1.0             0.0   
9999        3   75075.31              2        1.0             0.0   
10000       4  130142.79              1        1.0             0.0   
10001       4  130142.79              1        1.0             0.0   

       EstimatedSalary  Exited  
0            101348.88       1  
1            112542.58       0  
2            113931.57       1  
3             93826.63       0  
5            149756.71       1  
...                ...     ...  
9997          42085.58       1  
9998          92888.52       1  
9999          92888.52       1  
10000         38190.78       0  
10001         38190.78       0  

[9998 rows x 14 columns]

ABANDONO DE CLIENTES EN FUNCION DEL GENERO¶

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(x='Gender', hue='Exited', data=df)
plt.title('Abandono en función del Género')
plt.show()

ABANDONO DE CLIENTES EN FUNCION DE LOS AÑOS DE PERMANENCIA EN EL BANCO¶

In [7]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

df_grouped = df.groupby('Tenure')['Exited'].value_counts(normalize=True).unstack().fillna(0)

sorted_tenure_values = sorted(df['Tenure'].unique())

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("analisis de abandono de clientes en funcion de los años de permanencia "),

    dcc.Dropdown(
        id='tenure-dropdown',
        options=[{'label': f'permanencia {tenure} años', 'value': tenure} for tenure in sorted_tenure_values],
        value=min(sorted_tenure_values),
    ),

    dcc.Graph(id='churn-pie-chart'),
])

@app.callback(
    Output('churn-pie-chart', 'figure'),
    [Input('tenure-dropdown', 'value')]
)
def update_pie_chart(selected_tenure):

    selected_data = df[df['Tenure'] == selected_tenure]

    fig = px.pie(selected_data, names='Exited', title=f'analisis cuando el cliente lleva en el banco {selected_tenure} años',
                 labels={'Exited': 'Churn Status'}, hole=0.4)

    return fig

if __name__ == '__main__':
    app.run_server(debug=True, port=8051)

ABANDONO DEL CLIENTE EN FUNCION DE SI MANTIENE MAS O MENOS ACTIVOS FINANCIEROS EN EL BANCO.¶

In [8]:
import plotly.express as px
import pandas as pd

df_grouped = df.groupby('NumOfProducts')['Exited'].mean().reset_index()

fig = px.bar(df_grouped, x='NumOfProducts', y='Exited', color='NumOfProducts',
             labels={'NumOfProducts': 'Número de Productos Financieros', 'Exited': 'Tasa de Abandono'},
             title='Tasa de Abandono del Cliente en función del Número de Productos Financieros')

fig.show()
In [9]:
import plotly.subplots
import plotly.graph_objects as go
import pandas as pd

df_credit_card_no_churn = df[(df['HasCrCard'] == 1) & (df['Exited'] == 0)]
df_credit_card_churn = df[(df['HasCrCard'] == 1) & (df['Exited'] == 1)]

df_no_credit_card_no_churn = df[(df['HasCrCard'] == 0) & (df['Exited'] == 0)]
df_no_credit_card_churn = df[(df['HasCrCard'] == 0) & (df['Exited'] == 1)]

fig = plotly.subplots.make_subplots(rows=1, cols=1, subplot_titles=['Todos los Países'], shared_xaxes=True, shared_yaxes=True)

color_palette = px.colors.qualitative.Set1

fig.add_trace(go.Bar(x=['Con Tarjeta de Crédito', 'Sin Tarjeta de Crédito'],
                     y=[len(df[(df['HasCrCard'] == 1) & (df['Exited'] == 0)]),
                        len(df[(df['HasCrCard'] == 0) & (df['Exited'] == 0)])],
                     name='Todos los Países', marker_color=['lightblue', 'lightgreen']),
              row=1, col=1)

buttons = [dict(label='Todos los Países',
                method='update',
                args=[{'visible': [True]}, {'title': 'Todos los Países'}])]

for i, country in enumerate(df['Geography'].unique()):

    df_country = df[df['Geography'] == country]

    color = color_palette[i % len(color_palette)]

    fig.add_trace(go.Bar(x=['Con Tarjeta de Crédito', 'Sin Tarjeta de Crédito'],
                         y=[len(df_country[(df_country['HasCrCard'] == 1) & (df_country['Exited'] == 0)]),
                            len(df_country[(df_country['HasCrCard'] == 0) & (df_country['Exited'] == 0)])],
                         name=country, marker_color=color),
                  row=1, col=1)

    buttons.append(dict(label=country,
                        method='update',
                        args=[{'visible': [False] * len(buttons) + [True]}, {'title': f'Clientes con/sin Tarjeta de Crédito y Abandono en {country}'}]))

fig.update_layout(updatemenus=[dict(type='dropdown', showactive=True, buttons=buttons, x=0.1, xanchor='left', y=1.1, yanchor='top')])

fig.show()
In [10]:
fig = px.scatter_3d(df, x='CreditScore', y='Balance', z='Age', color='Exited',
                    title='Gráfico de Dispersión 3D para CreditScore, Balance y Edad',
                    labels={'CreditScore': 'Puntuación de Crédito', 'Balance': 'Saldo', 'Age': 'Edad', 'Exited': 'Abandono'})
fig.show()
In [11]:
import seaborn as sns
import matplotlib.pyplot as plt

numeric_df = df[['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']]

correlation_matrix = numeric_df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Mapa de Calor para Correlaciones entre Variables')
plt.show()